from lxml import etree

html_str = """
<div> <ul>
        <li class="item-1"><a>first item</a></li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-inactive"><a href="link3.html">third item</a></li>
        <li class="item-1"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a>
        </ul> </div>
"""

html = etree.HTML(html_str)

# 提取所有的item-1 li的href
href_list = html.xpath("//li[@class='item-1']/a/@href")

# 提取所有的item-1 li的text
text_list = html.xpath("//li[@class='item-1']/a/text()")

for temp in href_list:
    item = dict()
    item["href"] = temp
    item["text"] = text_list[href_list.index(temp)]
    print(item)
